# Scrapy settings for jdSkuSpider project
#
# For simplicity, this file contains only settings considered important or
# commonly used. You can find more settings consulting the documentation:
#
#     https://docs.scrapy.org/en/latest/topics/settings.html
#     https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
#     https://docs.scrapy.org/en/latest/topics/spider-middleware.html

BOT_NAME = 'jdSkuSpider'

SPIDER_MODULES = ['jdSkuSpider.spiders']
NEWSPIDER_MODULE = 'jdSkuSpider.spiders'

#代理商提供的代理网址
PROXY_SERVER_URL = 'https://h.shanchendaili.com/api.html?action=get_ip&key=HU015c86520413222339Rp2a&time=10&count=1&protocol=http&type=text&textSep=1&only=1'
#用来测试当前的代理ip是否被封禁
TEST_PAGE = 'https://item.jd.com/100017846659.html'
#不去重(True代表不去重, False代表去重)
DONT_FILTER = True

REDIS_HOST = '120.24.87.40'
REDIS_PORT = '6379'
REDIS_DB = 1
REDIS_PARAMS = {
    'password': 'Guet@207',
}
# 使用scrapy_redis中的调度器, 即保证每台主机爬取的URL地址都不同的Scheduler
SCHEDULER = 'scrapy_redis.scheduler.Scheduler'
# 配置scrapy使用的去重类, 即RFPDupeFilter
DUPEFILTER_CLASS = 'scrapy_redis.dupefilter.RFPDupeFilter'
# 序列化
SCHEDULER_SERIALIZER = "scrapy_redis.picklecompat"
#自动限速(控制爬取速度，降低对方服务器压力)
AUTOTHROTTLE_ENABLED = True

mongodb = {
    'host': '120.24.87.40',
    'port': 27017,
    'username': "admin",
    'password': "123456"
}


cookies = {
    'shshshfpa': '88442a32-4008-3016-2ed4-5f00ba6e02a6-1583306994',
    '__jdu': '1645417385963897370071',
    'shshshfpb': 'eKzbp55ekVbG%2BmXNii7FmnA%3D%3D',
    'unpl': 'JF8EAKpnNSttC0hdDR9XGhMQTw5XW15YGURUPDQCVFwIHFEHGQoSExZ7XlVdXhRKFR9vZxRXXlNKUQ4fBCsSE3tdVV9cD0gVBWduNWRtW0tkBCsCHBcUTl1SXFQMQxABZm8DVltZSlIFKzIcEhl7bWRbXQlKFQVpZAVXbVl7VgQaAB8XFEJcU24cZk4XAmZlSFRaXU9RBR0AEhYYTF9dVlsKTRYCaWc1VlReQ1I1GA',
    '__jdv': '76161171|baidu|-|organic|notset|1648984819662',
    'areaId': '20',
    'PCSYCityID': 'CN_450000_450300_0',
    'ipLoc-djd': '20-1726-22884-51455',
    'pinId': 'bxSvmv8CNc-KKJVv2AYoCLV9-x-f3wj7',
    'pin': 'jd_701eaef2a29ff',
    'unick': '%E6%9D%9C%E6%92%B0%E4%B8%AD%E7%9A%84%E6%9C%AA%E5%90%8D',
    '_tp': 'Wy5hpT1Zse8ScnmqROf6D38%2BJ8IK3ESqvwXymUySsLE%3D',
    '_pst': 'jd_701eaef2a29ff',
    'shshshfp': '9dc45f7c100e4d56fc0ddb7d96bc59ab',
    '__jdc': '122270672',
    '__jda': '122270672.1645417385963897370071.1645417385.1649430078.1649491583.20',
    'thor': 'FC76D1F441FDD23810222D65B21A1E62936594BACBBD52CF88EB830809C78B7F75DEEDA34458FE020C54AF1DD6E55F389B2063BCCBBD829B977631DD6FB67A17BA374FEFAA00AF1C8264A11F080FA449884B327D73A31031D1F35232730745A6BD1570FDB90E15A4002FCBF4C8CDED1F588BFA29B2272823A7263C9A88F289B84E2075F1A710BED202A651BD7ABBC09D354497FAFBB4A0A7CBDC9590803F6162',
    'ceshi3.com': '000',
    'token': '5a5e7384ead314efe3237efb8d9825fb,3,916384',
    '__tk': 'OINnrcq4NDN5NiuEqcrdriKiOcJ5sINEsca4rfKiqIhgNLbhOIN4sG,3,916384',
    'ip_cityCode': '1726',
    'wlfstk_smdl': 'xrr3qwmzs7zoj7y6kbt5nfkai2clqo2n',
    'shshshsID': '3c84311b6180b15c5bf06ba762b90ce2_7_1649491714829',
    '__jdb': '122270672.15.1645417385963897370071|20.1649491583',
    '3AB9D23F7A4B3C9B': '3WE3SSBAN2EJKLE3VP6ZK2I4UOCXTYVZTD7O46KFL2S7J2UVXVZ7IJGBJKC4S3RAWL2DRAMRLPN63TK3LHWTA5JVQQ',
}

# Crawl responsibly by identifying yourself (and your website) on the user-agent
#USER_AGENT = 'jdSkuSpider (+http://www.yourdomain.com)'

# Obey robots.txt rules
ROBOTSTXT_OBEY = False

# Configure maximum concurrent requests performed by Scrapy (default: 16)
#CONCURRENT_REQUESTS = 32

# Configure a delay for requests for the same website (default: 0)
# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
# See also autothrottle settings and docs
DOWNLOAD_DELAY = 1
# The download delay setting will honor only one of:
#CONCURRENT_REQUESTS_PER_DOMAIN = 16
#CONCURRENT_REQUESTS_PER_IP = 16

# Disable cookies (enabled by default)
#COOKIES_ENABLED = False

# Disable Telnet Console (enabled by default)
#TELNETCONSOLE_ENABLED = False

# Override the default request headers:
DEFAULT_REQUEST_HEADERS = {
    'authority': 'item.jd.com',
    'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'accept-language': 'zh,zh-CN;q=0.9',
    'cache-control': 'max-age=0',
    # Requests sorts cookies= alphabetically
    # 'cookie': 'shshshfpa=88442a32-4008-3016-2ed4-5f00ba6e02a6-1583306994; __jdu=1645417385963897370071; shshshfpb=eKzbp55ekVbG%2BmXNii7FmnA%3D%3D; unpl=JF8EAKpnNSttC0hdDR9XGhMQTw5XW15YGURUPDQCVFwIHFEHGQoSExZ7XlVdXhRKFR9vZxRXXlNKUQ4fBCsSE3tdVV9cD0gVBWduNWRtW0tkBCsCHBcUTl1SXFQMQxABZm8DVltZSlIFKzIcEhl7bWRbXQlKFQVpZAVXbVl7VgQaAB8XFEJcU24cZk4XAmZlSFRaXU9RBR0AEhYYTF9dVlsKTRYCaWc1VlReQ1I1GA; __jdv=76161171|baidu|-|organic|notset|1648984819662; areaId=20; PCSYCityID=CN_450000_450300_0; ipLoc-djd=20-1726-22884-51455; pinId=bxSvmv8CNc-KKJVv2AYoCLV9-x-f3wj7; pin=jd_701eaef2a29ff; unick=%E6%9D%9C%E6%92%B0%E4%B8%AD%E7%9A%84%E6%9C%AA%E5%90%8D; _tp=Wy5hpT1Zse8ScnmqROf6D38%2BJ8IK3ESqvwXymUySsLE%3D; _pst=jd_701eaef2a29ff; shshshfp=9dc45f7c100e4d56fc0ddb7d96bc59ab; __jdc=122270672; __jda=122270672.1645417385963897370071.1645417385.1649430078.1649491583.20; thor=FC76D1F441FDD23810222D65B21A1E62936594BACBBD52CF88EB830809C78B7F75DEEDA34458FE020C54AF1DD6E55F389B2063BCCBBD829B977631DD6FB67A17BA374FEFAA00AF1C8264A11F080FA449884B327D73A31031D1F35232730745A6BD1570FDB90E15A4002FCBF4C8CDED1F588BFA29B2272823A7263C9A88F289B84E2075F1A710BED202A651BD7ABBC09D354497FAFBB4A0A7CBDC9590803F6162; ceshi3.com=000; token=5a5e7384ead314efe3237efb8d9825fb,3,916384; __tk=OINnrcq4NDN5NiuEqcrdriKiOcJ5sINEsca4rfKiqIhgNLbhOIN4sG,3,916384; ip_cityCode=1726; wlfstk_smdl=xrr3qwmzs7zoj7y6kbt5nfkai2clqo2n; shshshsID=3c84311b6180b15c5bf06ba762b90ce2_7_1649491714829; __jdb=122270672.15.1645417385963897370071|20.1649491583; 3AB9D23F7A4B3C9B=3WE3SSBAN2EJKLE3VP6ZK2I4UOCXTYVZTD7O46KFL2S7J2UVXVZ7IJGBJKC4S3RAWL2DRAMRLPN63TK3LHWTA5JVQQ',
    'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
    'sec-fetch-dest': 'document',
    'sec-fetch-mode': 'navigate',
    'sec-fetch-site': 'none',
    'sec-fetch-user': '?1',
    'upgrade-insecure-requests': '1',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36',
}

# Enable or disable spider middlewares
# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
#SPIDER_MIDDLEWARES = {
#    'jdSkuSpider.middlewares.JdskuspiderSpiderMiddleware': 543,
#}

# Enable or disable downloader middlewares
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
DOWNLOADER_MIDDLEWARES = {
    'jdSkuSpider.middlewares.ProxyMiddleWare':542,
    'jdSkuSpider.middlewares.JdskuspiderDownloaderMiddleware': 543,
}

# Enable or disable extensions
# See https://docs.scrapy.org/en/latest/topics/extensions.html
#EXTENSIONS = {
#    'scrapy.extensions.telnet.TelnetConsole': None,
#}

# Configure item pipelines
# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
ITEM_PIPELINES = {
    'scrapy_redis.pipelines.RedisPipeline':400,
    'jdSkuSpider.pipelines.JdskuspiderPipeline': 300,
}

# Enable and configure the AutoThrottle extension (disabled by default)
# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
#AUTOTHROTTLE_ENABLED = True
# The initial download delay
#AUTOTHROTTLE_START_DELAY = 5
# The maximum download delay to be set in case of high latencies
#AUTOTHROTTLE_MAX_DELAY = 60
# The average number of requests Scrapy should be sending in parallel to
# each remote server
#AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
# Enable showing throttling stats for every response received:
#AUTOTHROTTLE_DEBUG = False

# Enable and configure HTTP caching (disabled by default)
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
#HTTPCACHE_ENABLED = True
#HTTPCACHE_EXPIRATION_SECS = 0
#HTTPCACHE_DIR = 'httpcache'
#HTTPCACHE_IGNORE_HTTP_CODES = []
#HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
